-- -*-Haskell-*-

{-
 * Copyright (c) 2007 Gabor Greif
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is furnished to do
 * so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
 * OR OTHER DEALINGS IN THE SOFTWARE.
 -}

-- Usage:
--
--  Set up the environment
--     setenv LD_LIBRARY_PATH /opt/exp/gnu/lib:$LD_LIBRARY_PATH
--     setenv PATH /home/ggreif/%NoBackup%/Omega1.4.1:$PATH
--
--  Start the omega interpreter by typing
--     omega ThristParser.omg
--  and run your commands on the prompt
--

import "LangPrelude.prg" 
  (head,tail,lookup,member,fst,snd,map,Monad,maybeM,id,ioM,Row,HasType,RCons,RNil,Eq,Equal,
  listM, foldl, foldr)


data Thrist :: forall (l :: *1) . (l ~> l ~> *)  ~> l ~> l ~> * where
  Nil :: Thrist k a a
  Cons :: k c b -> Thrist k b a -> Thrist k c a
 deriving List(l)


data Parse :: *0 ~> *0 ~> *0 where
  Atom :: Char -> Parse Char Char       -- exact match
  Pred :: (a -> Bool) -> Parse a a      -- conditional match
  Sure :: (a -> b) -> Parse a b         -- always matches and converts
  Try :: (a -> Maybe b) -> Parse a b    -- pipeline stops if no match
  Rep :: Parse a b -> Parse [a] ([b], [a])     -- consume as many as matches, return rest
  Group :: [Parse a b] -> Parse [a] ([b], [a]) -- all must match, return rest
  CataStar :: ([a] -> b) -> Parse [a] b -- collapse one or more elements
  CataPlus :: ([a] -> b) -> Parse [a] b -- collapse zero or more elements
  Wrap :: Thrist Parse a b -> Parse a b -- treat a chain of parses as one
  Or :: Parse a b -> Parse a b -> Parse a b    -- match first or if it doesn't the second
  Seq :: Parse [a] (b, [a]) -> Parse [a] (c, [a]) -> Parse [a] ((b, c), [a]) -- parse front first then second
  Seq2 :: Parse a b -> Parse a c -> Parse [a] ((b, c), [a]) -- same, but with single-elem first and second


letter = (Atom :: Char -> Parse Char Char)
char = Sure (id :: Char -> Char)

digit = Try (\c -> if ord '0' <= ord c && ord c <= ord '9' then Just (ord c - ord '0') else Nothing)

hexdigit = Try (\c -> if ord '0' <= ord c && ord c <= ord '9' then Just (ord c - ord '0') else
                      if ord 'a' <= ord c && ord c <= ord 'f' then Just (ord c - ord 'W') else
                      if ord 'A' <= ord c && ord c <= ord 'F' then Just (ord c - ord '7') else Nothing)

crop = Try (\(good, rest) -> Just good)
snap = Try (\(good, rest) -> case rest of { [] -> Just good; _ -> Nothing })

proper = Try (\(it, rest) -> case it of { [] -> Nothing; _ -> Just (it, rest) })
propersnap = Try (\a -> case a of { ([], _) -> Nothing; (_, r:rest) -> Nothing; _ -> Just a })

proper' p = Wrap #[p, proper]l
propersnap' p = Wrap #[p, propersnap]l

base n = foldl (\acc x -> (acc * n) + x) 0
num = Wrap #[Rep digit, crop, CataPlus (base 10)]l
chexnum = Wrap #[Seq (Seq2 (letter '0') (Or (letter 'x') (letter 'X'))) (Wrap #[Rep hexdigit, propersnap]l), second, CataPlus (base 16)]l

unwrap (Wrap x) = x

second = Sure (\((_, x), _) -> x)

word cs = Group (map letter cs)

-- ###############################
-- ############ parse ############
-- ###############################

parse :: Thrist Parse a b -> a -> Maybe b

parse Nil a = Just a

parse (Cons (Atom c) r) b = if ord c == ord b then parse r b else Nothing

parse (Cons (Sure f) r) a = parse r (f a)

parse (Cons (Try f) r) a = do { b <- f a; parse r b } where monad maybeM

parse (Cons (Rep p) r) as = parse r (parseRep p as) where
      parseRep :: Parse a b -> [a] -> ([b], [a])
      parseRep _ [] = ([], [])
      parseRep p (a:as) = case parse #[p]l a of { Nothing -> ([], a:as); Just b -> (b:bs, rest) where (bs, rest) = parseRep p as }

parse (Cons (Group ps) r) as = do { bs <- parseGroup ps as; parse r bs } where
      monad maybeM
      parseGroup :: [Parse a b] -> [a] -> Maybe ([b], [a])
      parseGroup [] rest = Just ([], rest) -- overlength input
      parseGroup _ [] = Nothing            -- input too short
      parseGroup (p:ps) (a:as) = do { b <- parse #[p]l a; (bs, rest) <- parseGroup ps as; return (b:bs, rest) }

parse (Cons (CataStar f) r) a = parse r (f a)

parse (Cons (CataPlus _) _) [] = Nothing

parse (Cons (CataPlus f) r) a = parse r (f a)

parse (Cons (Wrap thr) r) a = do { a' <- parse thr a; parse r a' } where monad maybeM

parse (Cons (Seq2 p1 p2) r) (a1:a2:rest) = do { b1 <- parse #[p1]l a1; b2 <- parse #[p2]l a2; parse r ((b1, b2), rest) } where monad maybeM

parse (Cons (Seq p1 p2) r) as = do { (b1, as') <- parse #[p1]l as; (b2, rest) <- parse #[p2]l as'; parse r ((b1, b2), rest) } where monad maybeM

parse (Cons (Or p1 p2) r) a = case parse #[p1]l a of
      (Just b1) -> parse r b1
      Nothing -> do { b2 <- parse #[p2]l a; parse r b2 } where monad maybeM

-- ##########################################
-- ############ pattern compiler ############
-- ##########################################

compileParse :: Thrist Parse a b -> Code (a -> Maybe b)
compileParse Nil = [| Just |]
compileParse (Cons (Atom c) r) = [| \b -> if ord c == ord b then parse r b else Nothing |]

compileParse (Cons (Or p1 p2) r) = [| \a ->
				    case parse #[p1]l a of
				    (Just b1) -> parse r b1
				    Nothing -> let monad maybeM in do { b2 <- parse #[p2]l a; parse r b2 } |]

tbug0 = (run compileParse $ #[Or (letter 's') (letter 'm')]l) 's'

##test "Unknown Var at level 0: bind"
 tbug1 = (run compileParse $ #[Or (letter 's') (letter 'm')]l) 'm'

-- ###############################
-- ############ tests ############
-- ###############################

t1 = parse #[Seq (Wrap #[Rep $ letter 'a', proper]l) (Seq (Rep $ letter 'b') (Rep (letter 'c')))]l "abbc"
t1a = parse #[Seq (proper' $ Rep $ letter 'a') (Seq (Rep $ letter 'b') (Rep (letter 'c')))]l "abbc"
t2 = parse (unwrap chexnum) "0xFF"
t3 = Group [letter 'a', letter 'b', letter 'c', letter 'd']
t3a = word "abcd"




